Normalize reads and create DGE objects
library(tidyverse)
library(edgeR)
get reads and create data frame
files <- dir("../input/Kallisto_output/", include.dirs = TRUE)
files %>% head()
counts.list <- map(files, ~ read_tsv(
file=file.path("..","input","Kallisto_output",.,"abundance.tsv"),
col_types = "cdddd"))
names(counts.list) <- files
counts <- sapply(counts.list, select, est_counts) %>%
bind_cols(counts.list[[1]][,"target_id"],.)
counts[is.na(counts)] <- 0
colnames(counts) <- sub(".est_counts","",colnames(counts),fixed = TRUE)
counts
write_csv(counts,"../output/2018-timecourse_V3.0_raw_counts_.csv.gz")
create sample description data frame
key <- readxl::read_excel("../input/tube_no_legend_time_course_2018.xlsx",
na=c("","na"),
col_types=c("text", "text", "text", "skip", "text", "skip", "skip", "skip", "text", "text", "text", "skip", "text", "skip", "skip", "date", "date")) %>%
mutate(sampling_time_specific=format(sampling_time_specific, format="%H:%M:%S"))
key
create reformatted tube_no
key <- key %>%
mutate(tube_no_2 = {
tolower(tube_no) %>%
str_replace("q_([1-9](_|$))", "q_00\\1") %>%
str_replace("q_([1-9][0-9](_|$))", "q_0\\1") }) %>%
select(tube_no, tube_no_2, everything())
key
samples <- tibble(
file=files,
tube_no_2 = str_extract(files, pattern = "q_[0-9]{3}(_d8)?")
)
samples
samples <- left_join(samples, key)
samples <- samples %>%
mutate(sampling_day=str_pad(sampling_day,width=2,side = "left",pad="0")) %>%
mutate(group=str_c(genotype, soil_trt, sampling_day, sampling_time,sep="-"))
samples
counts2 <- counts %>%
as.data.frame() %>%
column_to_rownames(var = "target_id") %>%
as.matrix() %>%
round(0)
samples2 <- samples %>%
select(-tube_no_2, -tube_no, -pot, -plant_no, -sampling_day_specific, -sampling_time_specific) %>%
as.data.frame() %>%
column_to_rownames(var="file")
dge <- DGEList(counts=counts2, samples=samples2, group=samples2$group)
normalize
dge <- calcNormFactors(dge)
save(dge, file="../output/timecourseDGE.Rdata")
LS0tCnRpdGxlOiAiMDJfTm9ybWFsaXplX1JlYWRzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpOb3JtYWxpemUgcmVhZHMgYW5kIGNyZWF0ZSBER0Ugb2JqZWN0cwoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGVkZ2VSKQpgYGAKCiMjIGdldCByZWFkcyBhbmQgY3JlYXRlIGRhdGEgZnJhbWUKCmBgYHtyfQpmaWxlcyA8LSBkaXIoIi4uL2lucHV0L0thbGxpc3RvX291dHB1dC8iLCBpbmNsdWRlLmRpcnMgPSBUUlVFKQpmaWxlcyAlPiUgaGVhZCgpCmBgYAoKYGBge3J9CmNvdW50cy5saXN0IDwtIG1hcChmaWxlcywgfiByZWFkX3RzdigKICBmaWxlPWZpbGUucGF0aCgiLi4iLCJpbnB1dCIsIkthbGxpc3RvX291dHB1dCIsLiwiYWJ1bmRhbmNlLnRzdiIpLAogIGNvbF90eXBlcyA9ICJjZGRkZCIpKQpuYW1lcyhjb3VudHMubGlzdCkgPC0gZmlsZXMKYGBgCgpgYGB7cn0KY291bnRzIDwtIHNhcHBseShjb3VudHMubGlzdCwgc2VsZWN0LCBlc3RfY291bnRzKSAlPiUgCiAgYmluZF9jb2xzKGNvdW50cy5saXN0W1sxXV1bLCJ0YXJnZXRfaWQiXSwuKQpjb3VudHNbaXMubmEoY291bnRzKV0gPC0gMApjb2xuYW1lcyhjb3VudHMpIDwtIHN1YigiLmVzdF9jb3VudHMiLCIiLGNvbG5hbWVzKGNvdW50cyksZml4ZWQgPSBUUlVFKQpjb3VudHMKYGBgCgpgYGB7cn0Kd3JpdGVfY3N2KGNvdW50cywiLi4vb3V0cHV0LzIwMTgtdGltZWNvdXJzZV9WMy4wX3Jhd19jb3VudHNfLmNzdi5neiIpCmBgYAoKIyMgY3JlYXRlIHNhbXBsZSBkZXNjcmlwdGlvbiBkYXRhIGZyYW1lCgpgYGB7cn0Ka2V5IDwtIHJlYWR4bDo6cmVhZF9leGNlbCgiLi4vaW5wdXQvdHViZV9ub19sZWdlbmRfdGltZV9jb3Vyc2VfMjAxOC54bHN4IiwKICAgICAgICAgICAgICAgICAgICAgICAgICBuYT1jKCIiLCJuYSIpLAogICAgICAgICAgICAgICAgICAgICAgICAgIGNvbF90eXBlcz1jKCJ0ZXh0IiwgInRleHQiLCAidGV4dCIsICJza2lwIiwgInRleHQiLCAic2tpcCIsICJza2lwIiwgInNraXAiLCAidGV4dCIsICJ0ZXh0IiwgInRleHQiLCAic2tpcCIsICJ0ZXh0IiwgInNraXAiLCAic2tpcCIsICJkYXRlIiwgImRhdGUiKSkgICU+JQogIG11dGF0ZShzYW1wbGluZ190aW1lX3NwZWNpZmljPWZvcm1hdChzYW1wbGluZ190aW1lX3NwZWNpZmljLCBmb3JtYXQ9IiVIOiVNOiVTIikpCmtleQpgYGAKCmNyZWF0ZSByZWZvcm1hdHRlZCB0dWJlX25vIApgYGB7cn0Ka2V5IDwtIGtleSAlPiUKICBtdXRhdGUodHViZV9ub18yID0gewogICAgdG9sb3dlcih0dWJlX25vKSAlPiUKICAgICAgc3RyX3JlcGxhY2UoInFfKFsxLTldKF98JCkpIiwgInFfMDBcXDEiKSAlPiUKICAgICAgc3RyX3JlcGxhY2UoInFfKFsxLTldWzAtOV0oX3wkKSkiLCAicV8wXFwxIikgfSkgJT4lCiAgc2VsZWN0KHR1YmVfbm8sIHR1YmVfbm9fMiwgZXZlcnl0aGluZygpKQprZXkKYGBgCgpgYGB7cn0Kc2FtcGxlcyA8LSB0aWJibGUoCiAgZmlsZT1maWxlcywKICB0dWJlX25vXzIgPSBzdHJfZXh0cmFjdChmaWxlcywgcGF0dGVybiA9ICJxX1swLTldezN9KF9kOCk/IikgCikKc2FtcGxlcwpgYGAKCmBgYHtyfQpzYW1wbGVzIDwtIGxlZnRfam9pbihzYW1wbGVzLCBrZXkpIApzYW1wbGVzIDwtIHNhbXBsZXMgJT4lCiAgbXV0YXRlKHNhbXBsaW5nX2RheT1zdHJfcGFkKHNhbXBsaW5nX2RheSx3aWR0aD0yLHNpZGUgPSAibGVmdCIscGFkPSIwIikpICU+JQogIG11dGF0ZShncm91cD1zdHJfYyhnZW5vdHlwZSwgc29pbF90cnQsIHNhbXBsaW5nX2RheSwgc2FtcGxpbmdfdGltZSxzZXA9Ii0iKSkKc2FtcGxlcwpgYGAKCmBgYHtyfQpjb3VudHMyIDwtIGNvdW50cyAlPiUgCiAgYXMuZGF0YS5mcmFtZSgpICU+JSAKICBjb2x1bW5fdG9fcm93bmFtZXModmFyID0gInRhcmdldF9pZCIpICU+JQogIGFzLm1hdHJpeCgpICU+JQogIHJvdW5kKDApCmBgYAoKYGBge3J9CnNhbXBsZXMyIDwtIHNhbXBsZXMgJT4lCiAgc2VsZWN0KC10dWJlX25vXzIsIC10dWJlX25vLCAtcG90LCAtcGxhbnRfbm8sIC1zYW1wbGluZ19kYXlfc3BlY2lmaWMsIC1zYW1wbGluZ190aW1lX3NwZWNpZmljKSAlPiUKICBhcy5kYXRhLmZyYW1lKCkgJT4lCiAgY29sdW1uX3RvX3Jvd25hbWVzKHZhcj0iZmlsZSIpCmBgYAoKYGBge3J9CmRnZSA8LSBER0VMaXN0KGNvdW50cz1jb3VudHMyLCBzYW1wbGVzPXNhbXBsZXMyLCBncm91cD1zYW1wbGVzMiRncm91cCkKYGBgCgojIyBub3JtYWxpemUKCmBgYHtyfQpkZ2UgPC0gY2FsY05vcm1GYWN0b3JzKGRnZSkKYGBgCgpgYGB7cn0Kc2F2ZShkZ2UsIGZpbGU9Ii4uL291dHB1dC90aW1lY291cnNlREdFLlJkYXRhIikKYGBgCgojIyBzYXZlIHNvbWUgZm9ybWF0dGVkIGNwbSBmaWxlcwoKYGBge3J9CmxvYWQoIi4uL291dHB1dC90aW1lY291cnNlREdFLlJkYXRhIikKYGBgCgpgYGB7cn0KZGdlJHNhbXBsZQpgYGAKCmNwbSBhdmVyYWdlZCBmb3IgZWFjaCBzYW1wbGUgdHlwZQpgYGB7cn0KbG9nMmNwbUdyb3VwIDwtIGNwbUJ5R3JvdXAoZGdlLCBsb2cgPSBUUlVFKQpkaW0obG9nMmNwbUdyb3VwKQpoZWFkKGxvZzJjcG1bLDE6MTBdKQpgYGAKCmBgYHtyfQp3cml0ZS5jc3YobG9nMmNwbUdyb3VwLCAiLi4vb3V0cHV0L2xvZzJjcG1Hcm91cC5jc3YuZ3oiKQpgYGAKCmNwbSBmb3IgZWFjaCBpbmRpdmlkdWFsIHNhbXBsZQpgYGB7cn0Kc2FtcGxlbmFtZXMgPC0gc3RyX2MoZGdlJHNhbXBsZXMkZ3JvdXAsIl9ibGsiLGRnZSRzYW1wbGVzJGJsb2NrKQpsb2cyY3BtU2FtcGxlIDwtIGNwbUJ5R3JvdXAoZGdlLCBsb2cgPSBUUlVFLCBncm91cD1zYW1wbGVuYW1lcykKZGltKGxvZzJjcG1TYW1wbGUpCmhlYWQobG9nMmNwbVNhbXBsZVssMToxMF0pCmxvZzJjcG1TYW1wbGUgPC0gbG9nMmNwbVNhbXBsZSAlPiUgZHBseXI6OnJlbmFtZSgiZ2VuZSI9WDEpCmBgYAoKYGBge3J9CndyaXRlLmNzdihsb2cyY3BtU2FtcGxlLCAiLi4vb3V0cHV0L2xvZzJjcG1TYW1wbGUuY3N2Lmd6IikKd3JpdGVfdHN2KGxvZzJjcG1TYW1wbGUsICIuLi9vdXRwdXQvbG9nMmNwbVNhbXBsZS50eHQuZ3oiKSAjIC50eHQgaXMgY29tcGF0aWJsZSB0byBHU0VBIGFwcC4KCmBgYA==